#Replication data for "The Promises and Pitfalls of 311 Data", Urban Affairs Review
#Ariel White and Kris-Stella Trump
#email Ariel (arwhi@mit.edu) with questions
#Posted January 2017

rm(list=ls())
library(foreign)
library(data.table)

#setwd("C:/Users/Ariel White/Dropbox (MIT)/final311repdata_forposting") #windows computer filepath for testing. replace with your own.
#setwd("/nfs/projects/c/civic_sp/nyc311/311asoutcome")

#Previous dataset - no registered voters data, only population
#Import merged precinct level calls and vote data

callsvote1 <- read.csv("precinctlevel311votingdata_mergeddec14.csv") 
callsvote2 <- read.csv("precinctlevel311votingdata_mergeddec14_publiconly.csv")
callsvote3 <- read.csv("precinctlevel311votingdata_mergeddec14_streetonly.csv")

callsvote1 <- callsvote1[is.na(callsvote1$VAP)==F & callsvote1$VAP>0,]
callsvote2 <- callsvote2[is.na(callsvote2$VAP)==F & callsvote2$VAP>0,]
callsvote3 <- callsvote3[is.na(callsvote3$VAP)==F & callsvote3$VAP>0,]
dim(callsvote1); dim(callsvote2); dim(callsvote3) #drop no-pop places
sum(callsvote1$allyear_2010)

#TOT10E9 for total registered voters in 2010
#TOT10G3 for total turnout senate race (codebook ELECCODES) 

#calls per VAP
callsvote1$election2010_1montharound_pc <- callsvote1$election2010_1montharound/callsvote1$VAP
callsvote1$election2010_6monthsbefore_pc <- callsvote1$election2010_6monthsbefore/callsvote1$VAP
callsvote1$election2010_3monthsbefore_pc <- callsvote1$election2010_3monthsbefore/callsvote1$VAP

callsvote2$election2010_1montharound_pc <- callsvote2$election2010_1montharound/callsvote2$VAP
callsvote2$election2010_6monthsbefore_pc <- callsvote2$election2010_6monthsbefore/callsvote2$VAP
callsvote2$election2010_3monthsbefore_pc <- callsvote2$election2010_3monthsbefore/callsvote2$VAP

callsvote3$election2010_1montharound_pc <- callsvote3$election2010_1montharound/callsvote3$VAP
callsvote3$election2010_6monthsbefore_pc <- callsvote3$election2010_6monthsbefore/callsvote3$VAP
callsvote3$election2010_3monthsbefore_pc <- callsvote3$election2010_3monthsbefore/callsvote3$VAP

#turnout
callsvote1$turnout2010_senate <- callsvote1$TOT10G3/callsvote1$TOT10E9
callsvote2$turnout2010_senate <- callsvote2$TOT10G3/callsvote2$TOT10E9
callsvote3$turnout2010_senate <- callsvote3$TOT10G3/callsvote3$TOT10E9

#Characterize turnout and calls
summary(callsvote1$election2010_1montharound_pc)
summary(callsvote1$election2010_6monthsbefore_pc)
summary(callsvote1$election2010_3monthsbefore_pc)
#I have no idea why summary() isn't working here.  Look at the actual vector, you'll see it's fine.
e1 <- callsvote1$election2010_1montharound_pc
summary(e1)
summary(callsvote1$turnout2010_senate)


#Turnout (senate race)
summary(callsvote1$turnout2010_senate)
sum(callsvote1$turnout2010_senate>1, na.rm=T) #high turnout - 1 place
sum(callsvote1$turnout2010_senate>0.8, na.rm=T) #high turnout - 3 places
callsvote1[callsvote1$turnout2010_senate>1 & is.na(callsvote1$turnout2010_senate)==F, c("NAME10", "VAP", "election2010_6monthsbefore", "county", "TOT10G3","TOT10E9")]

callsvote1[callsvote1$turnout2010_senate>1 & is.na(callsvote1$turnout2010_senate)==F,] #no longer an issue; ignore errors for now.

#This place has 6 registered voters but 49 turnout: exclude
callsvote1$turnout2010_senate[callsvote1$turnout2010_senate>1] <- NA

summary(callsvote2$turnout2010_senate)
sum(callsvote2$turnout2010_senate>1, na.rm=T) #high turnout - 1 place
sum(callsvote2$turnout2010_senate>0.8, na.rm=T) #high turnout - 3 places
callsvote2[callsvote2$turnout2010_senate>1 & is.na(callsvote2$turnout2010_senate)==F, c("NAME10", "VAP", "election2010_6monthsbefore", "county", "TOT10G3","TOT10E9")]
#This place has 6 registered voters but 49 turnout: exclude
callsvote2$turnout2010_senate[callsvote2$turnout2010_senate>1] <- NA

summary(callsvote3$turnout2010_senate)
sum(callsvote3$turnout2010_senate>1, na.rm=T) #high turnout - 1 place
sum(callsvote3$turnout2010_senate>0.8, na.rm=T) #high turnout - 3 places
callsvote3[callsvote3$turnout2010_senate>1 & is.na(callsvote3$turnout2010_senate)==F, c("NAME10", "VAP", "election2010_6monthsbefore", "county", "TOT10G3","TOT10E9")]
#This place has 6 registered voters but 49 turnout: exclude
callsvote3$turnout2010_senate[callsvote3$turnout2010_senate>1] <- NA
 
#Exclude also places with fewer than 100 VAP
callsvote1$exclude <- callsvote1$VAP<100
callsvote2$exclude <- callsvote2$VAP<100
callsvote3$exclude <- callsvote3$VAP<100

#High numbers of calls per capita
sum(callsvote1$election2010_6monthsbefore_pc>1 & callsvote1$exclude==F, na.rm=T) 
sum(callsvote2$election2010_6monthsbefore_pc>1 & callsvote2$exclude==F, na.rm=T) 
sum(callsvote3$election2010_6monthsbefore_pc>1 & callsvote3$exclude==F, na.rm=T) 

###NB removing v. small precincts here - obs drop from 6120 to 5869
callsvote1 <- callsvote1[callsvote1$VAP>100,]
callsvote2 <- callsvote2[callsvote2$VAP>100,]
callsvote3 <- callsvote3[callsvote3$VAP>100,]

#recode race
callsvote1$pctblack <- callsvote1$NH_DOJ_BLK/callsvote1$POP100
callsvote1$pcthispanic <- callsvote1$HISPANIC/callsvote1$POP100
callsvote1$pctasian <- callsvote1$NH_DOJ_ASN/callsvote1$POP100

callsvote2$pctblack <- callsvote2$NH_DOJ_BLK/callsvote2$POP100
callsvote2$pcthispanic <- callsvote2$HISPANIC/callsvote2$POP100
callsvote2$pctasian <- callsvote2$NH_DOJ_ASN/callsvote2$POP100

callsvote3$pctblack <- callsvote3$NH_DOJ_BLK/callsvote3$POP100
callsvote3$pcthispanic <- callsvote3$HISPANIC/callsvote3$POP100
callsvote3$pctasian <- callsvote3$NH_DOJ_ASN/callsvote3$POP100


#Calls compared to voter turnout
pdf(file="calls_turnout_precincts.pdf")
par(mfrow=c(1,3)) 
plot(callsvote1$turnout2010_senate,callsvote1$election2010_1montharound_pc, xlim=c(0,1), ylim=c(0,0.5), xlab="Senate 2010 Turnout", ylab="311 calls per capita, 1 month around election")
plot(callsvote1$turnout2010_senate,callsvote1$election2010_3monthsbefore_pc, xlim=c(0,1), ylim=c(0,1), xlab="Senate 2010 Turnout", ylab="311 calls per capita, 3 months before election")
plot(callsvote1$turnout2010_senate,callsvote1$election2010_6monthsbefore_pc, xlim=c(0,1), ylim=c(0,2), xlab="Senate 2010 Turnout", ylab="311 calls per capita, 6 months before election")
dev.off()

pdf(file="calls_turnout_precincts_publiconly.pdf")
par(mfrow=c(1,3)) 
plot(callsvote2$turnout2010_senate,callsvote2$election2010_1montharound_pc, xlim=c(0,1), ylim=c(0,0.5), xlab="Senate 2010 Turnout", ylab="311 calls per capita, 1 month around election")
plot(callsvote2$turnout2010_senate,callsvote2$election2010_3monthsbefore_pc, xlim=c(0,1), ylim=c(0,1), xlab="Senate 2010 Turnout", ylab="311 calls per capita, 3 months before election")
plot(callsvote2$turnout2010_senate,callsvote2$election2010_6monthsbefore_pc, xlim=c(0,1), ylim=c(0,2), xlab="Senate 2010 Turnout", ylab="311 calls per capita, 6 months before election")
dev.off()

pdf(file="calls_turnout_precincts_streetonly.pdf")
par(mfrow=c(1,3)) 
plot(callsvote3$turnout2010_senate,callsvote3$election2010_1montharound_pc, xlim=c(0,1), ylim=c(0,0.5), xlab="Senate 2010 Turnout", ylab="311 calls per capita, 1 month around election")
plot(callsvote3$turnout2010_senate,callsvote3$election2010_3monthsbefore_pc, xlim=c(0,1), ylim=c(0,1), xlab="Senate 2010 Turnout", ylab="311 calls per capita, 3 months before election")
plot(callsvote3$turnout2010_senate,callsvote3$election2010_6monthsbefore_pc, xlim=c(0,1), ylim=c(0,2), xlab="Senate 2010 Turnout", ylab="311 calls per capita, 6 months before election")
dev.off()


pdf(file="calls_turnout_precincts_allcallsets.pdf", width=8, height=6)
par(mfrow=c(1,3)) 
plot(callsvote1$turnout2010_senate,callsvote1$election2010_3monthsbefore_pc, xlim=c(0,.9), ylim=c(0,.9), xlab="Senate 2010 Turnout", ylab="All 311 calls per capita, 3 months before election", main="All 311 calls")
plot(callsvote2$turnout2010_senate,callsvote2$election2010_3monthsbefore_pc, xlim=c(0,.9), ylim=c(0,.9), xlab="Senate 2010 Turnout", ylab="`Public' 311 calls per capita, 3 months before election", main="'Public' 311 calls")
plot(callsvote3$turnout2010_senate,callsvote3$election2010_3monthsbefore_pc, xlim=c(0,.9), ylim=c(0,.9), xlab="Senate 2010 Turnout", ylab="`Street-level' 311 calls per capita, 3 months before election", main = "'Street' 311 calls")
dev.off()


#Relationship between turnout and 311 calls - all
cor(callsvote1$election2010_1montharound_pc, callsvote1$turnout2010_senate, use="complete.obs")
turnout_1month <- lm(callsvote1$election2010_1montharound_pc ~ callsvote1$turnout2010_senate)
summary(turnout_1month)

cor(callsvote1$election2010_3monthsbefore_pc, callsvote1$turnout2010_senate, use="complete.obs")
turnout_3month <- lm(callsvote1$election2010_3monthsbefore_pc ~ callsvote1$turnout2010_senate)
summary(turnout_3month)

cor(callsvote1$election2010_6monthsbefore_pc, callsvote1$turnout2010_senate, use="complete.obs")
turnout_6month <- lm(callsvote1$election2010_6monthsbefore_pc ~ callsvote1$turnout2010_senate)
summary(turnout_6month)

#Relationship between turnout and 311 calls - public
cor(callsvote2$election2010_1montharound_pc, callsvote2$turnout2010_senate, use="complete.obs")
turnout_1month <- lm(callsvote2$election2010_1montharound_pc ~ callsvote2$turnout2010_senate)
summary(turnout_1month)

cor(callsvote2$election2010_3monthsbefore_pc, callsvote2$turnout2010_senate, use="complete.obs")
turnout_3month <- lm(callsvote2$election2010_3monthsbefore_pc ~ callsvote2$turnout2010_senate)
summary(turnout_3month)

cor(callsvote2$election2010_6monthsbefore_pc, callsvote2$turnout2010_senate, use="complete.obs")
turnout_6month <- lm(callsvote2$election2010_6monthsbefore_pc ~ callsvote2$turnout2010_senate)
summary(turnout_6month)

#Relationship between turnout and 311 calls - street
cor(callsvote3$election2010_1montharound_pc, callsvote3$turnout2010_senate, use="complete.obs")
turnout_1month <- lm(callsvote3$election2010_1montharound_pc ~ callsvote3$turnout2010_senate)
summary(turnout_1month)

cor(callsvote3$election2010_3monthsbefore_pc, callsvote3$turnout2010_senate, use="complete.obs")
turnout_3month <- lm(callsvote3$election2010_3monthsbefore_pc ~ callsvote3$turnout2010_senate)
summary(turnout_3month)

cor(callsvote3$election2010_6monthsbefore_pc, callsvote3$turnout2010_senate, use="complete.obs")
turnout_6month <- lm(callsvote3$election2010_6monthsbefore_pc ~ callsvote3$turnout2010_senate)
summary(turnout_6month)

#again look at raw call data too.
cor(callsvote1$election2010_3monthsbefore, callsvote1$turnout2010_senate, use="complete.obs")
cor(callsvote2$election2010_3monthsbefore, callsvote2$turnout2010_senate, use="complete.obs")
cor(callsvote3$election2010_3monthsbefore, callsvote3$turnout2010_senate, use="complete.obs")
#again v. similar.

#add in the only covars we have (race)
cor(callsvote$election2010_1montharound_pc, callsvote$turnout2010_senate, use="complete.obs")
turnout_1month <- lm(callsvote$election2010_1montharound_pc ~ callsvote$turnout2010_senate)
summary(turnout_1month)
turnout_1monthrace <- lm(election2010_1montharound_pc ~ turnout2010_senate + pctblack + pcthispanic + pctasian, data=callsvote)
summary(turnout_1monthrace)

cor(callsvote$election2010_3monthsbefore_pc, callsvote$turnout2010_senate, use="complete.obs")
turnout_3month <- lm(callsvote$election2010_3monthsbefore_pc ~ callsvote$turnout2010_senate)
summary(turnout_3month)
turnout_3monthrace <- lm(election2010_3monthsbefore_pc ~ turnout2010_senate+ pctblack + pcthispanic + pctasian, data = callsvote)
summary(turnout_3monthrace)

cor(callsvote$election2010_6monthsbefore_pc, callsvote$turnout2010_senate, use="complete.obs")
turnout_6month <- lm(callsvote$election2010_6monthsbefore_pc ~ callsvote$turnout2010_senate)
summary(turnout_6month)
turnout_6month <- lm(election2010_6monthsbefore_pc ~ turnout2010_senate + pctblack + pcthispanic + pctasian, data = callsvote)
summary(turnout_6month)

#updated:
cor(callsvote1$election2010_3monthsbefore_pc, callsvote1$turnout2010_senate, use="complete.obs")
turnout_3month_all1 <- lm(election2010_3monthsbefore_pc ~ turnout2010_senate, data=callsvote1)
summary(turnout_3month_all1)
turnout_3monthrace_all2 <- lm(election2010_3monthsbefore_pc ~ turnout2010_senate+ pctblack + pcthispanic + pctasian, data = callsvote1)
summary(turnout_3monthrace_all2)

## make this a table?
library(stargazer)
stargazer(turnout_3month_all1, turnout_3monthrace_all2, out="addingcovars_turnout_allcalls_3mos.tex", 
	title = "Regressing Precinct-level 311 Call Volumes onto Voter Turnout Rates and Covariates",
          covariate.labels = c("Voter Turnout 2010 (Senate)","Percent Black", "Percent Hispanic", "Percent Asian"),
          dep.var.labels   = "311 Calls Per Capita", label="turnoutcovars", omit.stat = "f") 


## and also try R2's preferred spec (counts with a population control)


turnout_3month_all1a <- lm(election2010_3monthsbefore ~ TOT10G3 + POP100, data=callsvote1)
summary(turnout_3month_all1a)
turnout_3monthrace_all2a <- lm(election2010_3monthsbefore ~ TOT10G3+ NH_DOJ_BLK+HISPANIC+NH_DOJ_ASN +POP100, data = callsvote1)
summary(turnout_3monthrace_all2a)
## make this a table
library(stargazer)
stargazer(turnout_3month_all1a, turnout_3monthrace_all2a, out="REPLICATION1_addingcovars_turnout_allcalls_3mos.tex", 
	title = "Regressing Precinct-level 311 Call Volumes onto Voter Turnout and Race",
          covariate.labels = c("Votes Cast 2010 (Senate)","Black Residents", "Hispanic Residents", "Asian Residents", "Population"),
          dep.var.labels   = "311 Calls", label="turnoutcovars1", omit.stat = "f") 


cor(callsvote2$election2010_3monthsbefore_pc, callsvote2$turnout2010_senate, use="complete.obs")
turnout_3month_pub <- lm(callsvote2$election2010_3monthsbefore_pc ~ callsvote2$turnout2010_senate)
summary(turnout_3month_pub)
turnout_3monthrace_pub <- lm(election2010_3monthsbefore_pc ~ turnout2010_senate+ pctblack + pcthispanic + pctasian, data = callsvote2)
summary(turnout_3monthrace_pub)


callsvote1$pctblack <- callsvote1$NH_DOJ_BLK/callsvote1$POP100
callsvote1$pcthispanic <- callsvote1$HISPANIC/callsvote1$POP100
callsvote1$pctasian <- callsvote1$NH_DOJ_ASN/callsvote1$POP100

cor(callsvote3$election2010_3monthsbefore_pc, callsvote3$turnout2010_senate, use="complete.obs")
turnout_3month_street <- lm(callsvote3$election2010_3monthsbefore_pc ~ callsvote3$turnout2010_senate)
summary(turnout_3month_street)
turnout_3monthrace_street <- lm(election2010_3monthsbefore_pc ~ turnout2010_senate+ pctblack + pcthispanic + pctasian, data = callsvote3)
summary(turnout_3monthrace_street)



